from transformers import TextGenerationPipeline, GPT2LMHeadModel, GPT2TokenizerFast
import torch

model_name = "D:\AIModels\llm\Fengshenbang\Wenzhong-GPT2-110M-chinese-v2"

model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2TokenizerFast.from_pretrained(model_name)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

text_generator = TextGenerationPipeline(model, tokenizer, device=DEVICE)

for i in range(3):
    print(text_generator("你好,好久不见嗯！",
                         max_length=100, # 最大长度
                         # truncate = True, # 是否截断，为True时，会截断，为False时，会直接返回
                         do_sample=True  # 是否进行随机采样， 为True时，会根据top_k和top_p的值进行采样，为False时，会直接返回最大的概率值
                        ))